#!/usr/bin/env python
# -*- coding:utf-8 -*-
import pickle

from gensim.models.word2vec import Word2Vec
from keras.models import model_from_yaml
from keras.preprocessing.sequence import pad_sequences

__all__ = ['load_word2vec', 'load_model', 'load_tokenizer', 'tokenize', 'db', 'jb', 'model_factory']


def load_model(model, weights):
    """载入模型"""
    with open(model, 'r') as fp:
        m = model_from_yaml(fp.read())
    m.load_weights(weights)
    return m


def load_tokenizer(tokenizer):
    """载入分词器"""
    with open(tokenizer, 'rb') as fp:
        t = pickle.load(fp)
    return t


def load_word2vec(word2vec):
    """载入word2vec"""
    return Word2Vec.load(word2vec)


def tokenize(tokenizer, texts, max_len):
    """进行分词"""
    sequences = tokenizer.texts_to_sequences(texts)
    combined = pad_sequences(sequences, maxlen=max_len)
    return combined
